/* $Id$ */
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <pinyin.h>
#include "safestring.h"

typedef struct _HzPhrase
{
  u_char hz[MAX_PHRASE_LEN * 2 + 1];
  u_char freq;
  struct _HzPhrase *next;
}
HzPhrase;

typedef struct _KeyPhrase
{
  u_char len;
  u_char key[2 * MAX_PHRASE_LEN + 1];
  u_char count;			// number of Phrase items
  HzPhrase *hzph;
  struct _KeyPhrase *next;
}
KeyPhrase, *PKeyPhrase;

PinYin pytab[26][MAX_EACH_PY];
u_char hztab[MAX_PY_NUM][MAX_EACH_HZ];
PKeyPhrase phtab[MAX_PY_NUM];
u_short phcount[MAX_PY_NUM];

//fill the hztab and pytab structures
int
LoadTable (char *pathname)
{
  FILE *stream;
  char str[250], *strpy, *strhz;
  int i = 1, j = 0, lastpy = 0, curpy;

  if ((stream = fopen (pathname, "r")) == NULL)
    {
      fprintf (stderr, "%s file not found\n", pathname);
      exit (1);
    }

  while (!feof (stream))
    {
      if (fgets (str, 250, stream) != NULL)
	{
	  strpy = strtok (str, " \f\n\r\t\v");
	  strhz = strtok (NULL, " \f\n\r\t\v");
	  safe_strncpy (hztab[i], strhz, MAX_EACH_HZ);

	  curpy = strpy[0] - 'a';
	  if (curpy != lastpy)
	    j = 0;
	  safe_strncpy (pytab[curpy][j].py, strpy, MAX_PY_LEN);
	  pytab[curpy][j].key = i;
	  lastpy = curpy;
	  i++, j++;
	}
    }
  fclose (stream);
  return 0;
}

/* divide the string strbuf into string arrays according to space and Tab */

int
String2Array (char *strbuf, int len, char strarr[][len])
{
  int i = 0, cursor = 0, count = 0, buflen = strlen (strbuf);

  while (i < buflen)
    {
      while (i < buflen && (strbuf[i] == ' ' || strbuf[i] == '\011'))
	i++;			// skip space
      cursor = i;
      while (i < buflen && strbuf[i] != ' ' && strbuf[i] != '\011')
	i++;			// skip non-space
      if (i > cursor)
	{
	  strncpy (strarr[count], strbuf + cursor, i - cursor);
	  strarr[count++][i - cursor] = '\0';
	}
    }
  return count;
}

int
SavePhraseToMem (char *str, u_char * key, u_char len, u_char freq)
{
  PKeyPhrase kph, tmpkph;
  HzPhrase *hzph;
  int first;
  short ahead;

  if (len < 2)
    return 0;
  /* single char phrase ignored */
  if (len > MAX_PHRASE_LEN)
    {
      fprintf (stderr, "buffer overrun\n");
      abort ();
    }

  ahead = (short) key[1];
  ahead |= (key[0] & 0x01) << 8;

  kph = phtab[ahead];
  if (kph != NULL)		// first phrase of this pinyin
    {
      first = 1;
      do
	{
	  if (first)
	    first = 0;
	  else
	    kph = kph->next;

	  /* find the matched pinyin keyphrase */
	  if (kph->len == len && !memcmp (kph->key, key, len + 1))
	    {
	      for (hzph = kph->hzph; hzph != NULL; hzph = hzph->next)
		if (!memcmp (hzph->hz, str, 2 * len))	// same phrase
		  {
		    fprintf (stderr,
			     "Duplicate phrase %s detected, ignored!\n",
			     hzph->hz);
		    return 0;
		  }

	      hzph = kph->hzph;
	      while (hzph->next != NULL)
		hzph = hzph->next;	// reach the end of the link list

	      if ((hzph->next = (HzPhrase *) malloc (sizeof (HzPhrase))) ==
		  NULL)
		{
		  fprintf (stderr, "no enough memory\n");
		  exit (1);
		}
	      kph->count++;
	      hzph = hzph->next;
	      hzph->freq = 0;
	      hzph->next = NULL;
	      memcpy (hzph->hz, str, len * 2);	/* len < MAX_PHRASE_LEN */
	      hzph->hz[len * 2] = '\0';
	      return 1;		// insert a new Hanzi Phrase at the end of the link list
	    }
	}
      while (kph->next != NULL);
    }

  // not found , no matched pinyin keyphrase, allocate a new one
  if ((tmpkph = (KeyPhrase *) malloc (sizeof (KeyPhrase))) == NULL)
    {
      fprintf (stderr, "no enough memory\n");
      exit (1);
    }
  if (phtab[ahead] == NULL)
    phtab[ahead] = tmpkph;
  else
    kph->next = tmpkph;
  tmpkph->len = len;
  tmpkph->count = 1;
  memcpy (tmpkph->key, key, len + 1);	/* len < MAX_PHRASE_LEN */
  tmpkph->next = NULL;

  if ((tmpkph->hzph = (HzPhrase *) malloc (sizeof (HzPhrase))) == NULL)
    {
      fprintf (stderr, "no enough memory\n");
      exit (1);
    }
  tmpkph->hzph->freq = freq;
  tmpkph->hzph->next = NULL;
  memcpy (tmpkph->hzph->hz, str, len * 2);	/* len < MAX_PHRASE_LEN */
  tmpkph->hzph->hz[len * 2] = '\0';
  phcount[ahead]++;
  return 1;
}

int file_size = 0;

int
SavePhraseToFile (char *pathname)
{
  FILE *out;
  KeyPhrase *kph, *kphtmp;
  HzPhrase *hzph, *hzphtmp;
  u_char key[MAX_PHRASE_LEN + 1], len, count, freq;
  unsigned int j = 0, k = 0, itemcount = 0;

  if ((out = fopen (pathname, "wb")) == NULL)
    {
      fprintf (stderr, "Couldn't open \"%s\".\n", pathname);
      exit (1);
    }

  for (j = 1; j < MAX_PY_NUM; j++)
    {
      kph = phtab[j];

      file_size += 2;		// u_short

      /* No Char 
         if ((count = strlen(hztab[j])/2) > 0)
         {
         phcount[j]++;
         fwrite(&(phcount[j]),sizeof(phcount[j]),1,out);
         // output chars

         len = 1;
         fwrite(&len,sizeof(len),1,out);
         key[0] = j >> 8; key[1] = j & 0xFF;
         fwrite(&count,sizeof(count),1,out);
         fwrite(key,sizeof(char),2,out);
         for (k=0; k < count; k++)
         {
         fwrite(&(hztab[j][k*2]),sizeof(char),2,out);
         freq = 0; 
         fwrite(&freq,sizeof(freq),1,out);
         }
         file_size += 1 + 1 + 2 + count*(2+1);
         }
         else
       */

      //itemcount += phcount[j];
      fwrite (&(phcount[j]), sizeof (phcount[j]), 1, out);

      while (kph != NULL)
	{
	  hzph = kph->hzph;
	  kphtmp = kph;
	  kph = kph->next;

	  len = kphtmp->len;
	  if (len > MAX_PHRASE_LEN)
	    {
	      fprintf (stderr, "buffer overrun\n");
	      abort ();
	    }
	  memcpy (key, kphtmp->key, len + 1);
	  fwrite (&len, sizeof (char), 1, out);
	  fwrite (&(kphtmp->count), sizeof (kphtmp->count), 1, out);
	  fwrite (key, sizeof (char), len + 1, out);

	  /* len, key[len+1], count, phrase, freq , phrase, freq ... */

	  file_size += SizeOfPhrase (len, kphtmp->count);

	  while (hzph != NULL)
	    {
	      hzphtmp = hzph;
	      hzph = hzph->next;

	      itemcount++;
	      fwrite (hzphtmp->hz, sizeof (char), len * 2, out);
	      fwrite (&(hzphtmp->freq), sizeof (hzphtmp->freq), 1, out);
	      free (hzphtmp);
	    }
	  free (kphtmp);
	}
    }

  fwrite (&file_size, sizeof (file_size), 1, out);
  printf ("FileSize=%d\tTotalItem=%d\n\n", file_size + sizeof (int),
	  itemcount);
  fclose (out);

  return 1;
}

int
LoadPhraseFromFile (char *pathname)
{
  FILE *stream;
  int i, j;
  char str[250];
  u_char len;
  u_char key[MAX_PHRASE_LEN + 1];
  unsigned short pykey[MAX_PHRASE_LEN];
  int count, ahead, flag = 0, freq;
  char strarr[MAX_PHRASE_LEN + 4][2 * MAX_PHRASE_LEN + 1];

  if ((stream = fopen (pathname, "r")) == NULL)
    {
      fprintf (stderr, "Couldn't open \"%s\".\n", pathname);
      exit (1);
    }

  while (!feof (stream))
    {
      if (fgets (str, 250, stream) != NULL)
	{
	  str[strlen (str) - 1] = '\0';
	  count = String2Array (str, 2 * MAX_PHRASE_LEN + 1, strarr);
	  len = strlen (strarr[0]) / 2;
	  /* len+1 = count, freq = 0
	     len+2 = count, freq = xx */
	  if ((len != count - 1 && len != count - 2) || len > MAX_PHRASE_LEN)
	    {
	      fprintf (stderr, "Phrase %s error!!! Found invalid len: %d\n",
		       str, len);
	      continue;
	    }

	  if (len == count - 2)
	    {
	      freq = atoi (strarr[count - 1]);
	      if (freq > 255)
		freq = 255;
	      count--;
	    }
	  else
	    freq = 0;

	  for (i = 1; i < count; i++)
	    {
	      ahead = (int) strarr[i][0] - 'a';
	      flag = 0;
	      if (ahead < 0 || ahead > 25)
		{
		  fprintf (stderr,
			   "Phrase %s error!!! Found illgal key: %d\n", str,
			   ahead);
		  break;
		}

	      for (j = 0; pytab[ahead][j].key; j++)
		{
		  if (!strcmp (pytab[ahead][j].py, strarr[i]))
		    {
		      pykey[i - 1] = pytab[ahead][j].key;
		      flag = 1;
		      break;
		    }
		}
	      if (!flag)
		break;
	    }			// for

	  if (!flag)
	    {
	      fprintf (stderr, "Phrase %s error!!!\n", str);
	      continue;
	    }
	  for (i = 0; i < len; i++)
	    key[i + 1] = pykey[i] & 0xff;

	  key[0] = '\0';
	  for (i = 0; i < len; i++)
	    key[0] |= (pykey[i] & 0x0100) >> (8 - i);

	  /*
	     printf("%s, len=%d, key0=%d, key1 =%d, key=%d\n",
	     str,len,(int)key[0],(int)key[1],(int)key[2]);
	   */

	  SavePhraseToMem (str, key, len, freq);
	}
    }

  fclose (stream);
  return (0);
}

int
main (int argc, char **argv)
{
  int i;

  if (argc != 3)
    {
      fprintf (stderr, "usage: %s <input_name> <output_name>\n", argv[0]);
      return 1;
    }
  for (i = 0; i < MAX_PY_NUM; i++)
    {
      phtab[i] = NULL;
      phcount[i] = 0;
    }

  if(access("./pinyin.map", R_OK)==0)
        LoadTable ("./pinyin.map");
  else if(access("/usr/lib/unicon/modules/cce/dict/pinyin.map", R_OK)==0)
        LoadTable("/usr/lib/unicon/modules/cce/dict/pinyin.map");
  else if(access("/usr/local/lib/unicon/modules/cce/dict/pinyin.map", R_OK)==0)
        LoadTable("/usr/local/lib/unicon/modules/cce/dict/pinyin.map");
  else printf("Sorry, couldn't find pinyin.map!\n"), exit(-1);

  LoadPhraseFromFile (argv[1]);
  SavePhraseToFile (argv[2]);

  return 0;
}
